In [1]:
from ydata_profiling import ProfileReport
import pandas as pd
In [2]:
df = pd.read_csv("data/NewYork_data.tsv", sep= '\t')
print(df)
C:\Users\kaush\AppData\Local\Temp\ipykernel_4144\3817430330.py:1: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.
df = pd.read_csv("data/NewYork_data.tsv", sep= '\t')
CRASH DATE CRASH TIME BOROUGH ZIP CODE LATITUDE LONGITUDE \
0 09/11/2021 2:39 NaN NaN NaN NaN
1 03/26/2022 11:45 NaN NaN NaN NaN
2 06/29/2022 6:55 NaN NaN NaN NaN
3 09/11/2021 9:35 BROOKLYN 11208.0 40.667202 -73.866500
4 12/14/2021 8:13 BROOKLYN 11233.0 40.683304 -73.917274
... ... ... ... ... ... ...
2075422 03/05/2024 17:22 QUEENS 11436.0 40.680477 -73.792100
2075423 03/05/2024 17:00 BROOKLYN 11204.0 40.610786 -73.978820
2075424 03/03/2024 17:50 NaN NaN 40.675053 -73.947235
2075425 03/05/2024 14:30 BROOKLYN 11207.0 40.677900 -73.892586
2075426 03/05/2024 8:00 QUEENS 11385.0 40.706512 -73.878136
LOCATION ON STREET NAME CROSS STREET NAME \
0 NaN WHITESTONE EXPRESSWAY 20 AVENUE
1 NaN QUEENSBORO BRIDGE UPPER NaN
2 NaN THROGS NECK BRIDGE NaN
3 (40.667202, -73.8665) NaN NaN
4 (40.683304, -73.917274) SARATOGA AVENUE DECATUR STREET
... ... ... ...
2075422 (40.680477, -73.7921) SUTPHIN BOULEVARD FOCH BOULEVARD
2075423 (40.610786, -73.97882) NaN NaN
2075424 (40.675053, -73.947235) SAINT MARKS AVENUE NaN
2075425 (40.6779, -73.892586) MILLER AVENUE FULTON STREET
2075426 (40.706512, -73.878136) EDSALL AVENUE 73 STREET
OFF STREET NAME ... CONTRIBUTING FACTOR VEHICLE 2 \
0 NaN ... Unspecified
1 NaN ... NaN
2 NaN ... Unspecified
3 1211 LORING AVENUE ... NaN
4 NaN ... NaN
... ... ... ...
2075422 NaN ... Unspecified
2075423 161 AVENUE O ... Unspecified
2075424 NaN ... Unspecified
2075425 NaN ... NaN
2075426 NaN ... Unspecified
CONTRIBUTING FACTOR VEHICLE 3 CONTRIBUTING FACTOR VEHICLE 4 \
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
... ... ...
2075422 NaN NaN
2075423 Unspecified Unspecified
2075424 NaN NaN
2075425 NaN NaN
2075426 NaN NaN
CONTRIBUTING FACTOR VEHICLE 5 COLLISION_ID \
0 NaN 4455765
1 NaN 4513547
2 NaN 4541903
3 NaN 4456314
4 NaN 4486609
... ... ...
2075422 NaN 4707511
2075423 NaN 4707419
2075424 NaN 4707855
2075425 NaN 4707872
2075426 NaN 4707447
VEHICLE TYPE CODE 1 \
0 Sedan
1 Sedan
2 Sedan
3 Sedan
4 NaN
... ...
2075422 Station Wagon/Sport Utility Vehicle
2075423 Ambulance
2075424 Station Wagon/Sport Utility Vehicle
2075425 Station Wagon/Sport Utility Vehicle
2075426 Sedan
VEHICLE TYPE CODE 2 VEHICLE TYPE CODE 3 \
0 Sedan NaN
1 NaN NaN
2 Pick-up Truck NaN
3 NaN NaN
4 NaN NaN
... ... ...
2075422 Station Wagon/Sport Utility Vehicle NaN
2075423 PK Van
2075424 PK NaN
2075425 NaN NaN
2075426 Station Wagon/Sport Utility Vehicle NaN
VEHICLE TYPE CODE 4 VEHICLE TYPE CODE 5
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
... ... ...
2075422 NaN NaN
2075423 PK NaN
2075424 NaN NaN
2075425 NaN NaN
2075426 NaN NaN
[2075427 rows x 29 columns]
In [3]:
profile = ProfileReport(df, title = "New York Data Report")
In [4]:
profile.to_notebook_iframe()
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]